*******************************************************************************************************************************
* This replication file estimates the change in sales and consumer surplus due to exit when transportation costs are 75 Euro. *
*******************************************************************************************************************************

*************************
* 0 Set path to files *
*************************
clear all
capture log close
set more off
set trace off
mata: mata set matastrict off
cd $main_directory

*****************************************************************************************************************************
* 1. Predict changes in output and surplus from removal based on a model with just 1 one product (i.e. Q=all transactions). *
*****************************************************************************************************************************

*********************************************************************************************
* 1.1 Set predetermined values and calculate all variables necessary for demand prediction. *
*********************************************************************************************
clear all
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"
global transportation_estimate=75

//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters_t`=$transportation_estimate'", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q if common==1
sca sumQ=r(sum)
sca di sumQ
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand=Q 
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

***********************************************************************************************
* 1.2 Remove notaries from existing offices and calculate counterfactual sales of every firm. *
***********************************************************************************************
//Generate unique firm_IDs and store them in a vector named "firm_IDs" in mata.
preserve
sort firm_ID
collapse Q, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs and store them in a vector named "market_IDs" in mata.
preserve
sort market_ID_num
collapse Q, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Create an empty matrix in which to store the expected sales of each notary office in the counterfactuals.
//Each row i of "sales_Q_remove" corresponds to the sales data if one notary is removed from office i.
//The element i,j of "sales_Q_remove" contains the sales of firm j if one notary is removed from office i.
//First make an empty square matrix (n_firms x n_firms) in which to store the results
mata: length(market_IDs)
mata: sales_Q_remove=J(length(firm_IDs),length(firm_IDs),.) 

//Go over each notary office, remove one notary, calculate the sales for all notaries under the counterfactual and repeat for the next office.
gen touse=. //This is a marker for which observations should be included, since single-notary offices will be removed completely if a notary is removed.
gen NotPerOff_counter=NotPerOff-1 //In the counterfactual, there will be one fewer notary in the office for which the counterfactual is being calculated.

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.
levelsof market_ID, local(market_identifier)  //Generate a local variable called market_identifier, which contains the market_IDs of all markets. We will use this in the loop.

foreach l of local firm_identifier { //Go over each firm to remove a notary.
di `l' //Display the office ID in order to track how the code is progressing.
sca drop_ID=`l' //Define drop_ID as the identifier of the notary office in which one notary will be removed.
qui replace touse=1 if firm_ID!=drop_ID //Keep all observations in which no notary will be removed.
qui replace touse=0 if firm_ID==drop_ID & NotPerOff_counter<1 //Drop the notary office if the counterfactual number of notaries is less than 1.
qui replace touse=1 if firm_ID==drop_ID & NotPerOff_counter>0 //Keep the notary office if, even after removal, there is at least one notary in the office.
qui replace lnNotPerOff=log(NotPerOff) //Calculate the log of the notaries per office to use in the demand estimation.
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==drop_ID & NotPerOff_counter>0 //For the notary office in which a notary will be removed, use the counterfactual variable to calculate the log. If the number of notaries is zero, this office will not be used anyway.

//Calculate output using the counterfactual dataset.
sort market_ID
mata {
 drop_ID=st_numscalar("drop_ID") //Store the ID of the notary which will be removed in Mata.
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
 nx    = rows(X) //This calculates the number of observations (used to generate a unit vector for the constant).
 X    = X,J(nx,1,1) //This adds a constant to the control variables.
 Results=spdemand_log(X,betas_Q) //Results[,1] is the ID of the notary //Results[,2] is the log of the estimated demand per notary//Results[,3] is the log of the actual observed demand.
 n_firms=rows(Results) //The number of rows corresponds to the number of firms. This may be lower than in the original dataset if a notary office had to close due to the removal of a notary.
 st_numscalar("n_firms",n_firms) //Export the number of firms into the scalar n_firms.
	
	//The outer loop goes over all firms, to find the row in which the counterfactual output should be saved (the one where the row ID corresponds to the drop ID)
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the vector of firm IDs, to find the one where a notary was removed.
 if (drop_ID==firm_IDs[i]) { //Here we identify the row i, in which the ID of the firm is the same as the ID of the office where a notary was removed.
 
 //The inner loop then goes over all firms and adds their counterfactual output.
	for(f=1; f<=length(firm_IDs); f++) { //f goes over all notary IDs
		for(j=1; j<=n_firms; j++) { //j goes over all notaries in the results to find the estimates for competitors and the firm itself (note that j can be smaller than f, if an office had to close).
	if (firm_IDs[f]==Results[j,1]) { //If the firm ID f, corresponds to the firm ID recorded in the results table, then add the sales in element i,f of the sales matrix.
	sales_Q_remove[i,f]=round(exp(Results[j,2]),0.01) //The estimated log of sales is transformed. First, we take the exponent to get the actual sales and then we round to the second digit after the decimal point to save space.
	}
	}
  } //End of inner loop
  
 }
} //End of outer loop
} //End of mata commands
} //End of loop over all firms

mata: mata matsave "Data_Belgium\Generated_data\sales_Q_remove_t`=$transportation_estimate'" sales_Q_remove, replace

qui replace lnNotPerOff=log(NotPerOff) //Make sure that no permanent changes are made.

*******************************************************************************************************************************************************************************
* 1.3 Remove notaries from existing offices and calculate counterfactual consumer surplus on the market: structure is similar to the one above. For comments, check point 1.2 *
*******************************************************************************************************************************************************************************
clear all
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"
//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters_t`=$transportation_estimate'", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q if common==1
sca sumQ=r(sum)
sca di sumQ
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand=Q 
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.

//Generate unique firm_IDs
preserve
sort firm_ID
collapse Q, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs
preserve
sort market_ID
collapse Q, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Make a matrix in which to store the consumer surplus change due to removal of a notary from each of the firms.
mata: CS_total_remove=J(length(firm_IDs),1,.)
gen touse=.
gen NotPerOff_counter=NotPerOff-1
foreach l of local firm_identifier {
di `l'
sca drop_ID=`l'
qui replace touse=1 if firm_ID!=drop_ID
qui replace touse=0 if firm_ID==drop_ID & NotPerOff_counter<1
qui replace touse=1 if firm_ID==drop_ID & NotPerOff_counter>0
qui replace lnNotPerOff=log(NotPerOff)
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==drop_ID & NotPerOff_counter>0

//Surplus calculation
sort market_ID
mata {
 drop_ID=st_numscalar("drop_ID")
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
	CS_matrix=consumer_surplus(X,betas_Q,transportation_cost)
    CS_total=sum(CS_matrix[,2])
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the firm combinations to check which notary was added
 if (drop_ID==firm_IDs[i]) {
	CS_total_remove[i,1]=CS_total
	}
	}
  }
 }
mata: mata matsave "Data_Belgium\Generated_data\CS_total_remove_t`=$transportation_estimate'" CS_total_remove, replace
qui replace lnNotPerOff=log(NotPerOff)

*********************************************************************************************
* 2. Predict changes in output and surplus from removal for real-estate products (i.e. Q1). *
*********************************************************************************************

*********************************************************************************************
* 2.1 Set predetermined values and calculate all variables necessary for demand prediction. *
*********************************************************************************************
clear all
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"

//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters_t`=$transportation_estimate'", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q1 if common==1
sca sumQ1=r(sum)
sca di sumQ1
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ1/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case real-estate transactions).
gen firm_demand=Q1 
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

***********************************************************************************************
* 2.2 Remove notaries from existing offices and calculate counterfactual sales of every firm. *
***********************************************************************************************
//Generate unique firm_IDs and store them in a vector named "firm_IDs" in mata.
preserve
sort firm_ID
collapse Q1, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs and store them in a vector named "market_IDs" in mata.
preserve
sort market_ID_num
collapse Q1, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Create an empty matrix in which to store the expected sales of each notary office in the counterfactuals.
//Each row i of "sales_Q1_remove" corresponds to the sales data if one notary is removed from office i.
//The element i,j of "sales_Q1_remove" contains the sales of firm j if one notary is removed from office i.
mata: length(market_IDs)
mata: sales_Q1_remove=J(length(firm_IDs),length(firm_IDs),.) //First make an empty square matrix (n_firms x n_firms)

//Go over each notary office, remove one notary, calculate the sales for all notaries under the counterfactual and repeat for the next office.
gen touse=. //This is a marker for which observations should be included, since some offices will be removed completely if a notary is removed.
gen NotPerOff_counter=NotPerOff-1 //In the counterfactual, there will be one fewer notary in the office for which the counterfactual is being calculated.

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.
levelsof market_ID, local(market_identifier)  //Generate a local variable called market_identifier, which contains the market_IDs of all markets. We will use this in the loop.

foreach l of local firm_identifier { //Go over each firm to remove a notary.
di `l' //Display the office ID in order to track how the code is progressing.
sca drop_ID=`l' //Define drop_ID as the identifier of the notary office in which one notary will be removed.
qui replace touse=1 if firm_ID!=drop_ID //Keep all observations in which no notary will be removed.
qui replace touse=0 if firm_ID==drop_ID & NotPerOff_counter<1 //Drop the notary office if the counterfactual number of notaries is less than 1.
qui replace touse=1 if firm_ID==drop_ID & NotPerOff_counter>0 //Keep the notary office if, even after removal, there is at least one notary in the office.
qui replace lnNotPerOff=log(NotPerOff) //Calculate the log of the notaries per office to use in the regression.
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==drop_ID & NotPerOff_counter>0 //For the notary office in which a notary will be removed, use the counterfactual variable to calculate the log.

//Calculate output using the counterfactual dataset.
sort market_ID
mata {
 drop_ID=st_numscalar("drop_ID") //Store the ID of the notary which will be removed in Mata.
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
 nx    = rows(X) //This calculates the number of observations (used to generate a unit vector for the constant).
 X    = X,J(nx,1,1) //This adds a constant to the control variables.
 Results=spdemand_log(X,betas_Q1) //Results[,1] is the ID of the notary //Results[,2] is the log of the estimated demand per notary//Results[,3] is the log of the actual observed demand.
 n_firms=rows(Results) //The number of rows corresponds to the number of firms. This may be lower than in the original dataset if a notary office had to close due to the removal of a notary.
 st_numscalar("n_firms",n_firms) //Export the number of firms into the scalar n_firms.
	
	//The outer loop goes over all firms, to find the row in which the counterfactual output should be saved (the one where the row ID corresponds to the drop ID)
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the vector of firm IDs, to find the one where a notary was removed.
 if (drop_ID==firm_IDs[i]) { //Here we identify the row i, in which the ID of the firm is the same as the ID of the office where a notary was removed.
 
 //The inner loop then goes over all firms and adds their counterfactual output.
	for(f=1; f<=length(firm_IDs); f++) { //f goes over all notary IDs
		for(j=1; j<=n_firms; j++) { //j goes over all notaries in the results to find the estimates for competitors and the firm itself (note that j can be smaller than f, if an office had to close).
	if (firm_IDs[f]==Results[j,1]) { //If the firm ID f, corresponds to the firm ID recorded in the results table, then add the sales in element i,f of the sales matrix.
	sales_Q1_remove[i,f]=round(exp(Results[j,2]),0.01) //The estimated log of sales is transformed. First, we take the exponent to get the actual sales and then we round to the second digit after the decimal point to save space.
	}
	}
  } //End of inner loop
  
 }
} //End of outer loop
} //End of mata commands
} //End of loop over all firms

mata: mata matsave "Data_Belgium\Generated_data\sales_Q1_remove_t`=$transportation_estimate'" sales_Q1_remove, replace

qui replace lnNotPerOff=log(NotPerOff) //Make sure that no permanent changes are made.

*******************************************************************************************************************************************************************************
* 2.3 Remove notaries from existing offices and calculate counterfactual consumer surplus on the market: structure is similar to the one above. For comments, check point 1.2 *
*******************************************************************************************************************************************************************************
clear all
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"

//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters_t`=$transportation_estimate'", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q1 if common==1
sca sumQ1=r(sum)
sca di sumQ1
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ1/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand=Q1
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)


// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.

//Generate unique firm_IDs
preserve
sort firm_ID
collapse Q1, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs
preserve
sort market_ID
collapse Q1, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Make a matrix in which to store the consumer surplus change due to removal of a notary from each of the firms.
mata: CS_total_Q1_remove=J(length(firm_IDs),1,.)
gen touse=.
gen NotPerOff_counter=NotPerOff-1
foreach l of local firm_identifier {
di `l'
sca drop_ID=`l'
qui replace touse=1 if firm_ID!=drop_ID
qui replace touse=0 if firm_ID==drop_ID & NotPerOff_counter<1
qui replace touse=1 if firm_ID==drop_ID & NotPerOff_counter>0
qui replace lnNotPerOff=log(NotPerOff)
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==drop_ID & NotPerOff_counter>0

//Surplus calculation
sort market_ID
mata {
 drop_ID=st_numscalar("drop_ID")
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
	CS_matrix=consumer_surplus(X,betas_Q1,transportation_cost)
    CS_total=sum(CS_matrix[,2])
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the firm combinations to check which notary was added
 if (drop_ID==firm_IDs[i]) {
	CS_total_Q1_remove[i,1]=CS_total
	}
	}
  }
 }
mata: mata matsave "Data_Belgium\Generated_data\CS_total_Q1_remove_t`=$transportation_estimate'" CS_total_Q1_remove, replace
qui replace lnNotPerOff=log(NotPerOff)

*******************************************************************************************
* 3. Predict changes in output and surplus from removal for other transactions (i.e. Q2). *
*******************************************************************************************

*********************************************************************************************
* 3.1 Set predetermined values and calculate all variables necessary for demand prediction. *
*********************************************************************************************
clear all
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"
//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters_t`=$transportation_estimate'", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q2 if common==1
sca sumQ2=r(sum)
sca di sumQ2
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ2/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand=Q2 
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

***********************************************************************************************
* 3.2 Remove notaries from existing offices and calculate counterfactual sales of every firm. *
***********************************************************************************************
//Generate unique firm_IDs and store them in a vector named "firm_IDs" in mata.
preserve
sort firm_ID
collapse Q2, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs and store them in a vector named "market_IDs" in mata.
preserve
sort market_ID_num
collapse Q2, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Create an empty matrix in which to store the expected sales of each notary office in the counterfactuals.
//Each row i of "sales_Q2_remove" corresponds to the sales data if one notary is removed from office i.
//The element i,j of "sales_Q2_remove" contains the sales of firm j if one notary is removed from office i.
mata: length(market_IDs)
mata: sales_Q2_remove=J(length(firm_IDs),length(firm_IDs),.) //First make an empty square matrix (n_firms x n_firms)

//Go over each notary office, remove one notary, calculate the sales for all notaries under the counterfactual and repeat for the next office.
gen touse=. //This is a marker for which observations should be included, since some offices will be removed completely if a notary is removed.
gen NotPerOff_counter=NotPerOff-1 //In the counterfactual, there will be one fewer notary in the office for which the counterfactual is being calculated.

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.
levelsof market_ID, local(market_identifier)  //Generate a local variable called market_identifier, which contains the market_IDs of all markets. We will use this in the loop.

foreach l of local firm_identifier { //Go over each firm to remove a notary.
di `l' //Display the office ID in order to track how the code is progressing.
sca drop_ID=`l' //Define drop_ID as the identifier of the notary office in which one notary will be removed.
qui replace touse=1 if firm_ID!=drop_ID //Keep all observations in which no notary will be removed.
qui replace touse=0 if firm_ID==drop_ID & NotPerOff_counter<1 //Drop the notary office if the counterfactual number of notaries is less than 1.
qui replace touse=1 if firm_ID==drop_ID & NotPerOff_counter>0 //Keep the notary office if, even after removal, there is at least one notary in the office.
qui replace lnNotPerOff=log(NotPerOff) //Calculate the log of the notaries per office to use in the regression.
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==drop_ID & NotPerOff_counter>0 //For the notary office in which a notary will be removed, use the counterfactual variable to calculate the log.

//Calculate output using the counterfactual dataset.
sort market_ID
mata {
 drop_ID=st_numscalar("drop_ID") //Store the ID of the notary which will be removed in Mata.
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
 nx    = rows(X) //This calculates the number of observations (used to generate a unit vector for the constant).
 X    = X,J(nx,1,1) //This adds a constant to the control variables.
 Results=spdemand_log(X,betas_Q2) //Results[,1] is the ID of the notary //Results[,2] is the log of the estimated demand per notary//Results[,3] is the log of the actual observed demand.
 n_firms=rows(Results) //The number of rows corresponds to the number of firms. This may be lower than in the original dataset if a notary office had to close due to the removal of a notary.
 st_numscalar("n_firms",n_firms) //Export the number of firms into the scalar n_firms.
	
	//The outer loop goes over all firms to find the row in which the counterfactual output should be saved (the one where the row ID corresponds to the drop ID)
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the vector of firm IDs, to find the one where a notary was removed.
 if (drop_ID==firm_IDs[i]) { //Here we identify the row i, in which the ID of the firm is the same as the ID of the office where a notary was removed.
 
 //The inner loop then goes over all firms and adds their counterfactual output.
	for(f=1; f<=length(firm_IDs); f++) { //f goes over all notary IDs
		for(j=1; j<=n_firms; j++) { //j goes over all notaries in the results to find the estimates for competitors and the firm itself (note that j can be smaller than f, if an office had to close).
	if (firm_IDs[f]==Results[j,1]) { //If the firm ID f, corresponds to the firm ID recorded in the results table, then add the sales in element i,f of the sales matrix.
	sales_Q2_remove[i,f]=round(exp(Results[j,2]),0.01) //The estimated log of sales is transformed. First, we take the exponent to get the actual sales and then we round to the second digit after the decimal point to save space.
	}
	}
  } //End of inner loop
  
 }
} //End of outer loop
} //End of mata commands
} //End of loop over all firms

mata: mata matsave "Data_Belgium\Generated_data\sales_Q2_remove_t`=$transportation_estimate'" sales_Q2_remove, replace

qui replace lnNotPerOff=log(NotPerOff) //Make sure that no permanent changes are made.

*******************************************************************************************************************************************************************************
* 2.3 Remove notaries from existing offices and calculate counterfactual consumer surplus on the market: structure is similar to the one above. For comments, check point 1.2 *
*******************************************************************************************************************************************************************************
clear all
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"
//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters_t`=$transportation_estimate'", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q2 if common==1
sca sumQ2=r(sum)
sca di sumQ2
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ2/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand=Q2
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.

//Generate unique firm_IDs
preserve
sort firm_ID
collapse Q2, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs
preserve
sort market_ID
collapse Q2, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Make a matrix in which to store the consumer surplus change due to removal of a notary from each of the firms.
mata: CS_total_Q2_remove=J(length(firm_IDs),1,.)
gen touse=.
gen NotPerOff_counter=NotPerOff-1
foreach l of local firm_identifier {
di `l'
sca drop_ID=`l'
qui replace touse=1 if firm_ID!=drop_ID
qui replace touse=0 if firm_ID==drop_ID & NotPerOff_counter<1
qui replace touse=1 if firm_ID==drop_ID & NotPerOff_counter>0
qui replace lnNotPerOff=log(NotPerOff)
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==drop_ID & NotPerOff_counter>0

//Surplus calculation
sort market_ID
mata {
 drop_ID=st_numscalar("drop_ID")
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
	CS_matrix=consumer_surplus(X,betas_Q2,transportation_cost)
    CS_total=sum(CS_matrix[,2])
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the firm combinations to check which notary was added
 if (drop_ID==firm_IDs[i]) {
	CS_total_Q2_remove[i,1]=CS_total
	}
	}
  }
 }
mata: mata matsave "Data_Belgium\Generated_data\CS_total_Q2_remove_t`=$transportation_estimate'" CS_total_Q2_remove, replace
qui replace lnNotPerOff=log(NotPerOff)

